Getting the Data Together

## Warning: package 'vegan' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2

Now read in the data files we’ll be working with

macro.data <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/CrownHeight_Data.csv", header=T)
min.tree <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Macro_MinAges_HKY_Constrained_CON.tre")
#mean.tree <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_MeanAges_Constrained/Macro_MeanAges_HKY_Constrained_CON.tre")
#max.tree <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_MaxAges_Constrained_PF/Macro_MaxAges_HKY_Constrained_CON_fixed.tre")
#cp.min <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_CPMinAges_Constrained_PF/Macro_CP_MinAges_CON.tre")
#cp.mean <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_CPEstAges_Constrained_PF/Macro_CP_EstAges_CON.tre")
#cp.max <- read.nexus("/Users/Ian/Desktop/Macropod_Dating/Operators/Macro_CPMaxAges_Constrained_PF/Macro_CP_MaxAges_CON.tre")

Choose the current tree we want to work with

tree <- min.tree

The C4 plant reconstruction data from Andrae

enviro.data <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Andrea_S1.csv", header=T)
head(enviro.data)
##   Age Age_Error C4_recon_mean C4_recon_lower C4_recon_upper
## 1 1.0     0.002          59.2           36.7           81.6
## 2 2.0     0.001          36.6           11.8           61.3
## 3 2.5     0.008          36.0           11.2           60.8
## 4 2.8     0.005          35.4           10.5           60.2
## 5 2.8     0.013          21.8            0.0           48.1
## 6 3.0     0.005          38.8           14.3           63.3

And the dust flux data from Andrae

flux.data <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Aeolian_Flux.csv", header=T)
head(flux.data)
##          Age   A_Flux
## 1 0.05931646 108.3949
## 2 0.10874684 113.8622
## 3 0.20760760 118.6517
## 4 0.25703797 121.9088
## 5 0.35589873 109.4722
## 6 0.40532911 116.3859

Trim tree and data down to overlapping taxa

# extract the taxa that are in both the tree and 
overlaps <- intersect(tree$tip.label, unique(macro.data$Taxon))
trim.tree <- drop.tip(tree, setdiff(tree$tip.label, overlaps))
trim.data <- filter(macro.data, Taxon %in% overlaps)
head(trim.data)
##            ID TI                 Taxon Taxon_notes Status   Higher_tax
## 1    AM_M3164  3 Aepyprymnus_rufescens             Modern   Potoroinae
## 2    AM_M9697  3 Aepyprymnus_rufescens             Modern   Potoroinae
## 3   AM_M14017  2 Aepyprymnus_rufescens             Modern   Potoroinae
## 4    AM_M2267  3 Aepyprymnus_rufescens             Modern   Potoroinae
## 5    AM_B6980  2 Aepyprymnus_rufescens             Modern   Potoroinae
## 6 NMV_P201155  2   Baringa_nelsonensis             Fossil Macropodinae
##   Macropodin_status  Sex Side Assemblage Deposit_type Est_age   PW H_HYPCD
## 1    non_macropodin <NA>    R     Modern         <NA>     0.0 4.39    3.89
## 2    non_macropodin    M    R     Modern         <NA>     0.0 4.33    4.30
## 3    non_macropodin    M    R     Modern         <NA>     0.0 4.61    4.69
## 4    non_macropodin <NA>    R     Modern         <NA>     0.0 4.28    4.36
## 5    non_macropodin    M    R     Modern         <NA>     0.0 3.85    4.14
## 6        macropodin <NA>    R Nelson_Bay     Non_cave     0.8 4.93    5.62

OR trim tree and data down to just Macropodinae

macros <- filter(macro.data, Higher_tax == "Macropodinae")
overlaps <- intersect(tree$tip.label, unique(macros$Taxon))
trim.tree <- drop.tip(tree, setdiff(tree$tip.label, overlaps))
trim.data <- filter(macros, Taxon %in% overlaps)

create a tibble to get the species means (if you haven’t done this already)

  sp.means <- trim.data %>%
    group_by(Taxon) %>%
    summarise_at(vars(H_HYPCD), mean)
  #write.csv(sp.means, row.names=FALSE, file="/PATH/CrownHeight_Macropodinae_spMEANS.csv") # uncomment to write the file

Or you can just read in the file I’ve already prepared for the Macropodinae:

sp.means <- read.csv("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Couzens&Prideaux2018_dryad_package/data/CrownHeight_Macropodinae_spMEANS.csv", header=T)
head(sp.means)
##                   Taxon  H_HYPCD
## 1   Baringa_nelsonensis 7.461111
## 2      Bohra_illuminata 6.280000
## 3     Congruus_congruus 6.865000
## 4  Dendrolagus_dorianus 4.010000
## 5 Dendrolagus_lumholtzi 3.953571
## 6 Dendrolagus_matschiei 4.045000
macro.means <- sp.means$H_HYPCD; names(macro.means) <- sp.means$Taxon
species.means <- as.data.frame(sp.means$H_HYPCD); rownames(species.means) <- sp.means$Taxon

If you’re just working with the Macropodinae, make sure to trim the tree down

macros <- filter(macro.data, Higher_tax == "Macropodinae")
overlaps <- intersect(tree$tip.label, unique(macros$Taxon))
trim.tree <- drop.tip(tree, setdiff(tree$tip.label, overlaps))

We were only given the mean and confidence intervals for the grass data, so let’s make a function to extrapolate data from those mean and confidence intervals

time.estimates <- function (some.data, reps){
  time.col <- NULL; recon.col <- NULL
  for(j in 1:nrow(enviro.data)) {
    
    time.sd <- sd(c(enviro.data$Age[j], (enviro.data$Age[j] + enviro.data$Age_Error[j]), (enviro.data$Age[j] - enviro.data$Age_Error[j])))
    time.samples <- rnorm(reps, enviro.data$Age[j], time.sd)
    #time.samples <- runif(reps, min=(enviro.data$Age[j] - enviro.data$Age_Error[j]), max=(enviro.data$Age[j] + enviro.data$Age_Error[j]))
    time.col <- rbind(time.col, as.data.frame(time.samples))
    
    recon.sd <- sd(c(enviro.data$C4_recon_mean[j], enviro.data$C4_recon_lower[j], enviro.data$C4_recon_upper[j]))
    recon.samples <- rnorm(reps, enviro.data$C4_recon_mean[j], recon.sd/2)
    #recon.samples <- runif(reps, min=enviro.data$C4_recon_lower[j], max=enviro.data$C4_recon_upper[j])
    recon.col <- rbind(recon.col, as.data.frame(recon.samples))
  }
  time.est.samples <- cbind(time.col, recon.col)
  time.est.samples[time.est.samples<0] <- 0
  time.est.samples[time.est.samples>100] <- 100
  time.est.samples <- time.est.samples[order(time.est.samples$time.samples),]
  return(time.est.samples)
}

We’ll actually reconstruct those data now:

enviro <- time.estimates(enviro.data, 100)
plot(enviro)

Visualizing Our Data

Let’s quickly visualize the data in a few different ways to get an idea of what’s going on

# first using Phenogram from Phytools
phenogram(trim.tree, macro.means)

#plotTree.wBars(trim.tree, macro.means, args.plotTree=list(border=F, fsize=0.5)) # type="fan"
plotTree.barplot(trim.tree, macro.means, args.barplot=list(beside=TRUE, border=F))

dotTree(trim.tree, macro.means, ftype="i")

Fitting Models of Trait Evolution

Correlative models like the environmental models in RPANDA will be sensitive to the amount of smoothing to the trend line of the input data (see Clavel & Morlon, PNAS). To address this, we’ll create a function that searches for the optimum smoothness of the trend by fitting a set of values.

best.smoothing <- function (phy, trait.data, time.data=InfTemp, degrees=c(0,10,20,30,40,50), model="EnvExp", cores=6) {
  res.list <- mclapply(1:length(degrees), function(x) {
    fit_t_env(phy, trait.data, env_data=time.data, df=degrees[x], scale=F, plot=T, model=model)}, mc.cores = cores)
  for(i in 1:length(res.list)){res.list[[i]]$df <- degrees[i]}
  res.values <- unlist(lapply(res.list, function(x) x$aicc)) # make a vector of the values, so we can get the index number of the best
  best.res <- res.list[[which.min(res.values)]]
  
  plot(best.res, main=paste(model, "; AICc = ", round(best.res$aicc,2)), sub=paste("sigma = ",round(best.res$param[1],2), " beta = ",round(best.res$param[2],2), " df = ", best.res$df), col="red")
  
  return(list(all.results=res.list, best.result=best.res, best.df=best.res$df))
}

We can have a look at what this smoothing actually does to our data. We can come back to look at these once we get the optimum fits for our models.

#grass.spline0  <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=0)
#grass.spline10 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=10)
#grass.spline20 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=20)
grass.spline30 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=30)
grass.spline40 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=40)
grass.spline50 <- sm.spline(x=enviro$time.samples, y=enviro$recon.samples, df=50)

plot(enviro, main="C4 Grass Reconstruction Through Time", xlab="Millions of Years Ago", ylab="Percent C4")
#lines(grass.spline0, col="red", lwd=4)
#lines(grass.spline10, col="green", lwd=4)
#lines(grass.spline20, col="blue", lwd=4)
lines(grass.spline30, col="yellow", lwd=4)
lines(grass.spline40, col="violet", lwd=4)
lines(grass.spline50, col="gold", lwd=4)

data(InfTemp)
env.spline0 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=0)
env.spline10 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=10)
env.spline20 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=20)
env.spline30 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=30)
env.spline40 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=40)
env.spline50 <- sm.spline(x=InfTemp$Age, y=InfTemp$Temperature, df=50)

plot(InfTemp, main="Paleotemperature Through Time")
lines(env.spline0, col="red")
lines(env.spline10, col="green")
lines(env.spline20, col="blue")
lines(env.spline30, col="yellow")
lines(env.spline40, col="magenta")
lines(env.spline50, col="cyan")

flux.spline0  <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=0)
flux.spline10 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=10)
flux.spline20 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=20)
flux.spline30 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=30)
flux.spline40 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=40)
flux.spline50 <- sm.spline(x=flux.data$Age, y=flux.data$A_Flux, df=50)

plot(flux.data, main="Aeolian Dust Flux Through Time", xlab="Millions of Years Ago", ylab="Aeolian Dust Flux")
lines(flux.spline0, col="red", lwd=4)
lines(flux.spline10, col="green", lwd=4)
lines(flux.spline20, col="blue", lwd=4)
lines(flux.spline30, col="yellow", lwd=4)
lines(flux.spline40, col="magenta", lwd=4)
lines(flux.spline50, col="cyan", lwd=4)

Next we’ll fit a number of models to our tree and data

Start with the environmental model of paleotemperature. You can designate the number of cores and the amount of smoothing

# linear model first
ENVexp <- best.smoothing(trim.tree, macro.means, time.data=InfTemp, degrees=c(10,20,30,40,50), model="EnvExp", cores=5)

# exponential model next
ENVlin <- best.smoothing(trim.tree, macro.means, time.data=InfTemp, degrees=c(10,20,30,40,50), model="EnvLin", cores=5)

Next up the grass model, using C4 reconstructions.

GRASSexp <- best.smoothing(trim.tree, macro.means, time.data=enviro, degrees=c(30,40,50), model="EnvExp", cores=3)

GRASSlin <- best.smoothing(trim.tree, macro.means, time.data=enviro, degrees=c(30,40,50), model="EnvLin", cores=3)

And finally the flux models, using aeolian dust measurements.

FLUXexp <- best.smoothing(trim.tree, macro.means, time.data=flux.data, degrees=c(10,20,30,40,50), model="EnvExp", cores=5)

FLUXlin <- best.smoothing(trim.tree, macro.means, time.data=flux.data, degrees=c(10,20,30,40,50), model="EnvLin", cores=5)

Lastly, for comparison, run a few standard models. These are Brownian Motion, Brownian Motion with a Trend, and Early Burst.

BM_res <- fitContinuous(trim.tree, species.means, model="BM")
trend_res <- fitContinuous(trim.tree, species.means, model="trend")
EB_res <- fitContinuous(trim.tree, species.means, model="EB")

Compare the models with AICc, and check differences across the trees

model_FIT <- c(ENVexp$best.result$aicc, ENVlin$best.result$aicc, 
                  GRASSexp$best.result$aicc, FLUXexp$best.result$aicc, FLUXlin$best.result$aicc, #GRASSlin$best.result$aicc, 
                  BM_res$opt$aicc, trend_res$opt$aicc, EB_res$opt$aicc); names(model_FIT) <- c("ENVexp", "ENVlin", "GRASSexp", "FLUXexp", "FLUXlin", "BM", "Trend", "EB")
aic.w(model_FIT)
##   ENVexp   ENVlin GRASSexp  FLUXexp  FLUXlin       BM    Trend       EB 
##        0        0        1        0        0        0        0        0
fit.aic <- as.data.frame(as.vector(aic.w(model_FIT))); fit.aic$model <- names(model_FIT); colnames(fit.aic) <- c("aiccw", "model"); fit.aic$age <- "Min_Ages"

Quickly collapse models from the same data

fit.aic$model.type <- c("ENV", "ENV", "GRASS", "FLUX", "FLUX", "BM", "Trend", "EB")
fit.aic$model.type <- factor(fit.aic$model.type, levels=c("ENV", "FLUX", "GRASS", "BM", "EB", "Trend"))

Then plot the model fits as AICc weights

library(wesanderson)
(ggplot(fit.aic)
  + geom_bar(aes(y=aiccw, x=age, fill=model.type), stat="identity")
  #+ theme(axis.text.x=element_text(angle=25, hjust=1), panel.background=element_blank(), legend.position="bottom")
  + theme_classic()
  + scale_fill_manual( values=wes_palette("Zissou1", 6, "continuous")))

We can further look at our data by plottin Disparity Through Time to our data Then we can simulate data under the preferred model (GRASSexp or GRASSlin) to see if our data fits the expectation.

simGRASSexp <- sim_t_env(trim.tree, GRASSexp$best.result$param, model="EnvExp", env_data=enviro,
          root.value=GRASSexp$best.result$root, plot=T)

simGRASSlin <- sim_t_env(trim.tree, GRASSlin$best.result$param, model="EnvLin", env_data=enviro,
                         root.value=GRASSlin$best.result$root, plot=T)

And again run DTT on our simulated data to see if the patterns hold up.

dttGRASSexp <- dtt(trim.tree, simGRASSexp, nsim=100, plot=T) 

dttGRASSlin <- dtt(trim.tree, simGRASSlin, nsim=100, plot=T)

# obviously you'll increase the nsim to >1000.

Fitting Models to Our Data as a Function of Time

Ok, now that we’ve fit the models to a given tree, we want to fit the models to lots of trees of different ages, shapes, etc. Let’s read in a file I’ve made of trees from the whole span of dating analyses.

tree.span <- read.tree("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Tree_Span.tre")
tree.span
## 79 phylogenetic trees

Now we’ll run a loop across all of these trees, to fit the models to each one. It may take a little while.

# MAKE THIS EVAL=TRUE IF YOU WANT TO DO THIS BIT FOR REAL
all.aics <- NULL; all.results <- NULL
for (k in 1:length(tree.span)){
  beginning <- Sys.time()
  int.results <- NULL
  
  # Fit a number of models to the data (ENV, GRASS, BM, EB, Trend, Drift)
  ENVexp <- best.smoothing(tree.span[[k]], macro.means, time.data=InfTemp, degrees=c(10,20,30,40,50), model="EnvExp", cores=5); int.results[["ENVexp"]] <- ENVexp$best.result; 
  ENVlin <- best.smoothing(tree.span[[k]], macro.means, time.data=InfTemp, degrees=c(10,20,30,40,50), model="EnvLin", cores=5); int.results[["ENVlin"]] <- ENVlin$best.result; 
  
  GRASSexp <- best.smoothing(tree.span[[k]], macro.means, time.data=testo, degrees=c(30,50), model="EnvExp", cores=3); int.results[["GRASSexp"]] <- GRASSexp$best.result; 
  #GRASSlin <- best.smoothing(trim.tree, macro.means, time.data=testo, degrees=c(30,40,50), model="EnvLin", cores=3)
  
  FLUXexp <- best.smoothing(tree.span[[k]], macro.means, time.data=flux.data, degrees=c(10,20,30,40,50), model="EnvExp", cores=5); int.results[["FLUXexp"]] <- FLUXexp$best.result; 
  FLUXlin <- best.smoothing(tree.span[[k]], macro.means, time.data=flux.data, degrees=c(10,20,30,40,50), model="EnvLin", cores=5); int.results[["FLUXlin"]] <- FLUXlin$best.result; 
  
  BM_res    <- fitContinuous(tree.span[[k]], species.means, model="BM"); int.results[["BM"]] <- BM_res
  trend_res <- fitContinuous(tree.span[[k]], species.means, model="trend"); int.results[["Trend"]] <- trend_res
  EB_res    <- fitContinuous(tree.span[[k]], species.means, model="EB"); int.results[["EB"]] <- EB_res
  
  curr_tree_FIT <- c(ENVexp$best.result$aicc, ENVlin$best.result$aicc, 
                    GRASSexp$best.result$aicc, FLUXexp$best.result$aicc, FLUXlin$best.result$aicc, #GRASSlin$best.result$aicc,
                    BM_res$opt$aicc, trend_res$opt$aicc, EB_res$opt$aicc); names(curr_tree_FIT) <- c("ENVexp", "ENVlin", "GRASSexp", "FLUXexp", "FLUXlin", "BM", "Trend", "EB")
  curr.aic <- as.data.frame(as.vector(aic.w(curr_tree_FIT))); 
        curr.aic$model <- names(curr_tree_FIT); colnames(curr.aic) <- c("aiccw", "model"); 
            curr.aic$age <- round(max(nodeHeights(tree.span[[k]])), 3)
                curr.aic$tree <- k
  all.results[[k]] <- int.results
  
  curr.aic$model.type <- c("ENV", "ENV", "GRASS", "FLUX", "FLUX", "BM", "Trend", "EB")
  
  # ENVexp <- ENVexp$best.result;  ENVlin <- ENVlin$best.result; GRASSexp <- GRASSexp$best.result; FLUXexp <- FLUXexp$best.result;  FLUXlin <- FLUXlin$best.result
  # curr.aic[which(curr.aic$aiccw == max(curr.aic$aiccw)),"model"]
  
  all.aics <- rbind.data.frame(all.aics, curr.aic); 
  
  end <- Sys.time()
  duration <- format(end-beginning)
  print(paste("Computation time :", duration))
}

Save the file externally:

# MAKE THIS EVAL=TRUE IF YOU WANT TO DO THIS BIT FOR REAL
all.aics$age <- factor(all.aics$age, levels=c("Min_Ages", "Mean_Ages", "Max_Ages"))
all.aics$model.type <- factor(all.aics$model.type, levels=c("ENV", "FLUX", "GRASS", "BM", "EB", "Trend"))
#saveRDS(all.aics, file="/Users/Ian/Desktop/Macropod_Dating/Model_Fitting_AICCs.RDS")

Or skip the work and read in the file instead:

all.aics <- readRDS("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Model_Fitting.RDS")
all.aics$age <- as.factor(all.aics$age)
(ggplot(all.aics)
  + geom_bar(aes(y=aiccw, x=age, fill=model.type), stat="identity")
  + theme(axis.text.x=element_text(angle=90, hjust=1), panel.background=element_blank(), legend.position="bottom")
  + scale_fill_manual( values=wes_palette("Zissou1", 6, "continuous")))

Comparing Estimated Node Ages Across Dating Schemes

Make a quick function to get the DEPTH of a node (from present), instead of the HEIGHT (from root)

MRCA.depth <- function(phy){max(nodeHeights(phy)) - findMRCA(phy, tips=c("Setonix_brachyurus", "Wallabia_bicolor"), type="height")}

Prepare trees for comparison of ages

age.cp.min  <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/Macro_CPMinAges_Constrained_PF/Macro_CP_MinAges_NEWICK.trees"); age.cp.min <- age.cp.min[(length(age.cp.min)-200):length(age.cp.min)]
cp.min <- as.data.frame(unlist(lapply(age.cp.min, MRCA.depth))); cp.min$tree <- "cp.min"; colnames(cp.min) <- c("age", "tree")

age.cp.mean <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/Macro_CPEstAges_Constrained_PF/Macro_CP_EstAges_NEWICK.trees"); age.cp.mean <- age.cp.mean[(length(age.cp.mean)-200):length(age.cp.mean)]
cp.est <- as.data.frame(unlist(lapply(age.cp.mean, MRCA.depth))); cp.est$tree <- "cp.est"; colnames(cp.est) <- c("age", "tree")

age.cp.max <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/Macro_CPMaxAges_Constrained_PF/Macro_CP_MaxAges_NEWICK.trees"); age.cp.max <- age.cp.max[(length(age.cp.max)-200):length(age.cp.max)]
cp.max <- as.data.frame(unlist(lapply(age.cp.max, MRCA.depth))); cp.max$tree <- "cp.max"; colnames(cp.max) <- c("age", "tree")

age.range.strict <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/Operators/Macro_AgesRanges_LinksCorrected_STRICT/Macro_AgesRanges_LinksCorrected_Newick.trees"); age.range.strict <- age.range.strict[(length(age.range.strict)-200):length(age.range.strict)]
range.strict <- as.data.frame(unlist(lapply(age.range.strict, MRCA.depth))); range.strict$tree <- "range.strict"; colnames(range.strict) <- c("age", "tree")

age.all <- rbind(cp.min, cp.est, cp.max, range.strict)
age.all$tree <- factor(age.all$tree, levels=c("cp.max", "range.strict", "cp.est", "cp.min"))
(ggplot(age.all, aes(x=age, fill=tree))
  + geom_density(alpha=0.75, adjust=1.5)
  + theme(axis.text.x=element_text(angle=0, hjust=1), panel.background=element_blank(), legend.position="bottom")
  + scale_fill_manual(values=wes_palette("Zissou1", type="continuous", 4))
  + scale_x_reverse(lim=c(19,9)))

Getting Bayes Factors for Fossil Taxa

Investigating Fossil Taxa as putative Sampled Ancestors or Terminals

prior.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Prior_Only_NEWICK.trees"); prior.trees <- prior.trees[(length(prior.trees)-200):length(prior.trees)]
post.trees <- read.nexus("/Users/Ian/Google.Drive/ANU Herp Work/Macropod_Dating/FossilUncertainty/Macro_CP_EstAges_NEWICK.trees"); post.trees <- post.trees[(length(post.trees)-200):length(post.trees)]

Choose which tips you want information for:

fossil_taxa <- c("Baringa_nelsonensis", "Bohra_illuminata", "Congruus_congruus",
                 "Dorcopsoides_fossilis", "Ganguroo_bilamina","Hadronomas_puckridgi",
                 "Kurrabi_mahoneyi","Macropus_pavana","Ngamaroo_archeri",
                 "Bulungamaya_delicata","Prionotemnus_palankarinnicus",
                 "Procoptodon_goliah","Protemnodon_anak","Simosthenurus_occidentalis",
                 "Sthenurus_andersoni","Troposodon_minor","Wanburoo_hilarus")

Get the node numbers of the tips

nodes <- sapply(fossil_taxa,function(x,y) which(y==x),y=tree$tip.label)

Then get the edge lengths for those nodes

edge.lengths <- setNames(tree$edge.length[sapply(nodes,
                                               function(x,y) which(y==x),y=tree$edge[,2])],names(nodes))

The faster way is to make a function to do this:

get_terminal_branchlengths <- function(phy, tipnames){
  ## Get the node numbers of the tips
  nodes <- sapply(tipnames,function(x,y) which(y==x),y=phy$tip.label)
  ## Then get the edge lengths for those nodes
  edge.lengths <- setNames(phy$edge.length[sapply(nodes,
                                                   function(x,y) which(y==x),y=phy$edge[,2])],names(nodes))
  return(edge.lengths)
}

Now that we’ve got the tips and branch lengths, we can compare the posterior to the prior

BFSA <- function(prior.phy, posterior.phy, tips){
  post <- lapply(posterior.phy, get_terminal_branchlengths, tipnames=tips); names(post) <- NULL; post <- unlist(post)
  prior <- lapply(prior.phy,    get_terminal_branchlengths, tipnames=tips); names(prior)<- NULL; prior <- unlist(prior)
  
  BFs <- NULL
  for (j in 1:length(tips)){
    curr.tip <- subset(post, names(post)==tips[j]); 
    probSA <- sum(curr.tip<=0); probTIP <- length(curr.tip)-probSA;
    
    curr.tip <- subset(prior, names(prior)==tips[j]);
    priorSA <- sum(curr.tip<=0); priorTIP <- length(curr.tip)-priorSA;
    
    curr.BF <- log((probSA * priorTIP) / (probTIP * priorSA))
    if(is.na(curr.BF)){curr.BF <- 0}
    
    #curr.BF <- log(probSA/(length(curr.tip)-probSA))
    names(curr.BF) <- tips[j]; curr.BF <- round(curr.BF, 2)
    BFs <- append(BFs, curr.BF)
  }
  return(BFs)
}

Orient the data appropriately

macro_BFs <- BFSA(prior.trees, post.trees, tips=fossil_taxa)

macro_BFs <- as.data.frame(macro_BFs) # make the vector a data frame
macro_BFs[which(macro_BFs$macro_BFs > 5),] <- 5 # change any really big (INF) numbers to 5
macro_BFs[which(macro_BFs$macro_BFs < -5),] <- -5 # change any really small (-INF) numbers to -5

macro_BFs$taxa <- rownames(macro_BFs); # create column with the taxon names
macro_BFs <- macro_BFs[order(macro_BFs$macro_BFs),] # reorder by BF values
# set colors for plotting
macro_BFs$color <- "black";
macro_BFs[which(macro_BFs$macro_BFs > 1),]$color <- "#3d98d3"
macro_BFs[which(macro_BFs$macro_BFs < -1),]$color <- "#FF7175"

macro_BFs$taxa <- factor(macro_BFs$taxa, levels=c(macro_BFs$taxa)) # set factors for plotting (NOT NECESSARY)

Then plot the data

ggplot(macro_BFs, aes(x=taxa, y=macro_BFs, label=macro_BFs)) + 
  geom_ribbon(ymin=-1, ymax=+1) +
  geom_point(stat='identity', size=8, color=macro_BFs$color)  +
  # geom_segment(aes(y = 0, 
  #                  x = taxa, 
  #                  yend = macro_BFs, 
  #                  xend = taxa), 
  #              color = macro_BFs$color) +
  geom_text(color="white", size=2) +
  #labs(title="Bayes Factor Support", subtitle="for Fossil Taxa as Sampled Ancestors") + 
  #ylim(-5, 5) +
  scale_y_continuous(name="log Bayes Factors", limits=c(-5,5), breaks=c(-5:5)) +
  #theme(panel.background=element_blank()) +
  geom_hline(yintercept=-1) +
  geom_hline(yintercept=1) +
  xlab("Fossil Taxa") +
  #ylab("log Bayes Factors") +
  theme_classic() +
  coord_flip()

Plot the C4 and Flux data on a geological time scale

library(deeptime); library(gridExtra)

pp <- ggplot(enviro.data, aes(Age)) +
  geom_ribbon(aes(ymin = C4_recon_lower, ymax = C4_recon_upper), fill = "pink") +
  geom_line(aes(y = C4_recon_mean), color="red") + scale_x_reverse() + theme_classic() +
  coord_cartesian(xlim = c(0, 10), ylim = c(0,80), expand = FALSE) 

qq <- gggeo_scale(pp, dat="epochs")


rr <- ggplot(flux.data, aes(Age)) +
  geom_ribbon(aes(ymin = A_Flux-35, ymax = A_Flux+35), fill = "light blue") +
  geom_line(aes(y = A_Flux), color="blue") + scale_x_reverse() + theme_classic() +
  coord_cartesian(xlim = c(0, 13), ylim = c(0,150), expand = FALSE) 

ss <- gggeo_scale(rr, dat="epochs")

grid.arrange(qq, ss, nrow=1)

Investigating estimated ages of fossils

Create a function to pull the ages of each fossil taxon estimated

get.fossil.ages <- function(fossil.tips, trees){
  tree.tables <- lapply(trees, print.tree)
  fossil.tables <- lapply(1:length(tree.tables), function(x) {
    subset(tree.tables[[x]], tree.tables[[x]]$label %in% fossil.tips)
  })
  fossil.ages <- lapply(1:length(fossil.tables), function(x) {
    select(fossil.tables[[x]], label, time_bp)
  })
  final <- bind_rows(fossil.ages)
}

Need to create a function called “print.tree” that I borrowed some code from biogeoBEARS

Now pull out the info on those fossils

my.test <- get.fossil.ages(fossil.tips = fossil_taxa, trees = post.trees)
my.test$label <- factor(my.test$label, levels=c("Bulungamaya_delicata",
                                                "Protemnodon_anak",
                                                "Sthenurus_andersoni",
                                                "Troposodon_minor",
                                                "Baringa_nelsonensis",
                                                "Prionotemnus_palankarinnicus",
                                                "Congruus_congruus",
                                                "Bohra_illuminata",
                                                "Kurrabi_mahoneyi",
                                                "Ngamaroo_archeri",
                                                "Dorcopsoides_fossilis",
                                                "Wanburoo_hilarus",
                                                "Ganguroo_bilamina",
                                                "Hadronomas_puckridgi",
                                                "Simosthenurus_occidentalis",
                                                "Macropus_pavana",
                                                "Procoptodon_goliah"))

(ggplot(my.test, aes(x=time_bp, y=label, fill=..x..)) 
  + scale_fill_gradientn(colours=wes_palette("Zissou1"))
  + geom_density_ridges_gradient(scale=2)
  + scale_x_reverse()
  + theme_classic())
## Picking joint bandwidth of 2.28e-07